home *** CD-ROM | disk | FTP | other *** search
Wrap
/* GetAllHTML "URL"/A,"DestDir"/A,NOASK/S,ARC/S,PIC/S,RESUME/S,PAUSE/S,TERSE,DEPTH=/N/K,PORT="/K",BASEURL=/K,BROKENLINKS/S v1.00 (22-08-02) Copyright 1998-2002 Chris S Handley (email: cshandley@iee.org) If you alter & distribute this, please mention me as the original author! Do not hold your breath for the E version as I have so little spare time. See GetAllHTML.doc for more details */ OPTIONS RESULTS Call Addlib('rexxsupport.library',0,-30,0) Say 'GetAllHTML v1.00 Copyright 1998-2002 Chris Handley (read program file for details)' /* set-up */ HTTPResume='Programs:Utils/Comms/HTTPResume' ExtDir='T:' TempFile='T:GetAllHTML' TempFileAdd = Random(1,999,Time(s)) DO UNTIL ~Exists(TempFile||TempFileAdd) TempFileAdd = Random(1,999,Time(s)) END TempFile=TempFile||TempFileAdd /* deal with args */ Parse VALUE Arg(1) WITH '"' MainURL '"' . '"' DestDir '"' Switch1 Switch2 Switch3 Switch4 Switch5 Switch6 Switch7 Switch8 Switch9 Switch10 IF (MainURL='')|(DestDir='') THEN DO Say 'ERROR: Empty argument(s)!' Say 'Usage: GetAllHTML "URL"/A,"DestDir"/A,NOASK/S,ARC/S,PIC/S,RESUME/S,PAUSE/S,TERSE/S,DEPTH=/N/K,NOBASEINDEX/S,PORT=/K,BASEURL=/K,BROKENLINKS/S' Say 'Note - both URL & DestDir *must* be enclosed in "double quotes".' Say ' - after DEPTH should be a "=" followed by a number with NO spaces between them.' Say ' - after PORT should be a "=" followed by a string with NO spaces between them.' Say ' - after BASEURL should be a "=" followed by a string with NO spaces between them.' Exit 20 END IF (Right(DestDir,1)~='/')&(Right(DestDir,1)~=':') THEN DestDir=DestDir||'/' CALL MakeDir(DestDir) IF Left(MainURL,7)~='http://' THEN MainURL='http://'||MainURL Switch1=Upper(Switch1); Switch2=Upper(Switch2); Switch3=Upper(Switch3); Switch4=Upper(Switch4); Switch5=Upper(Switch5); Switch6=Upper(Switch6); Switch7=Upper(Switch7); Switch8=Upper(Switch8); Switch9=Upper(Switch9); Switch10=Upper(Switch10); Switch11=Upper(Switch11) SwNoAsk=0; SwArc=0; SwPic=0; SwResume=0; SwDepth=30; SwNoPause=1; SwPort=0; SwTerse=0; BaseURLDir=''; SwBroken=0; SwNoBaseIndex=0 IF (Switch1='NOASK')|(Switch2='NOASK')|(Switch3='NOASK')|(Switch4='NOASK')|(Switch5='NOASK')|(Switch6='NOASK')|(Switch7='NOASK')|(Switch8='NOASK')|(Switch9='NOASK')|(Switch10='NOASK')|(Switch11='NOASK') THEN SwNoAsk=1 IF (Switch1='ARC')|(Switch2='ARC')|(Switch3='ARC')|(Switch4='ARC')|(Switch5='ARC')|(Switch6='ARC')|(Switch7='ARC')|(Switch8='ARC')|(Switch9='ARC')|(Switch10='ARC')|(Switch11='ARC') THEN SwArc=1 IF (Switch1='PIC')|(Switch2='PIC')|(Switch3='PIC')|(Switch4='PIC')|(Switch5='PIC')|(Switch6='PIC')|(Switch7='PIC')|(Switch8='PIC')|(Switch9='PIC')|(Switch10='PIC')|(Switch11='PIC') THEN SwPic=1 IF (Switch1='RESUME')|(Switch2='RESUME')|(Switch3='RESUME')|(Switch4='RESUME')|(Switch5='RESUME')|(Switch6='RESUME')|(Switch7='RESUME')|(Switch8='RESUME')|(Switch9='RESUME')|(Switch10='RESUME')|(Switch11='RESUME') THEN SwResume=1 IF (Switch1='PAUSE')|(Switch2='PAUSE')|(Switch3='PAUSE')|(Switch4='PAUSE')|(Switch5='PAUSE')|(Switch6='PAUSE')|(Switch7='PAUSE')|(Switch8='PAUSE')|(Switch9='PAUSE')|(Switch10='PAUSE')|(Switch11='PAUSE') THEN SwNoPause=0 IF (Switch1='TERSE')|(Switch2='TERSE')|(Switch3='TERSE')|(Switch4='TERSE')|(Switch5='TERSE')|(Switch6='TERSE')|(Switch7='TERSE')|(Switch8='TERSE')|(Switch9='TERSE')|(Switch10='TERSE')|(Switch11='TERSE') THEN SwTerse=1 IF (Switch1='BROKENLINKS')|(Switch2='BROKENLINKS')|(Switch3='BROKENLINKS')|(Switch4='BROKENLINKS')|(Switch5='BROKENLINKS')|(Switch6='BROKENLINKS')|(Switch7='BROKENLINKS')|(Switch8='BROKENLINKS')|(Switch9='BROKENLINKS')|(Switch10='BROKENLINKS')|(Switch11='BROKENLINKS') THEN SwBroken=1 IF (Switch1='NOBASEINDEX')|(Switch2='NOBASEINDEX')|(Switch3='NOBASEINDEX')|(Switch4='NOBASEINDEX')|(Switch5='NOBASEINDEX')|(Switch6='NOBASEINDEX')|(Switch7='NOBASEINDEX')|(Switch8='NOBASEINDEX')|(Switch9='NOBASEINDEX')|(Switch10='NOBASEINDEX')|(Switch11='NOBASEINDEX') THEN SwNoBaseIndex=1 IF (Left(Switch1,5)='DEPTH')|(Left(Switch2,5)='DEPTH')|(Left(Switch3,5)='DEPTH')|(Left(Switch4,5)='DEPTH')|(Left(Switch5,5)='DEPTH')|(Left(Switch6,5)='DEPTH')|(Left(Switch7,5)='DEPTH')|(Left(Switch8,5)='DEPTH')|(Left(Switch9,5)='DEPTH')|(Left(Switch10,5)='DEPTH')|(Left(Switch11,5)='DEPTH') THEN DO Parse VALUE Upper(Arg(1)) WITH '"' . '"' . '"' . '"' . 'DEPTH=' Depth . IF Depth='' THEN DO Say 'No DEPTH number found (must use "DEPTH=x" where x is your number).' Say 'Search pages up to a depth of: ' Pull Depth END IF Depth>42 THEN Depth=42 /* sanity protect against ARexx limitation */ IF Depth<10 THEN SwDepth=Depth*2 /* since each grows by 2 each depth (e.g.".2.3.4.5") */ ELSE SwDepth=((Depth-9)*3)+(9*2) /* as above but above 9 grows by 3 (e.g.".12.13.14.15") */ SwDepth=SwDepth+5-2 /* 5 = length of "Root." */ END IF (Left(Switch1,4)='PORT')|(Left(Switch2,4)='PORT')|(Left(Switch3,4)='PORT')|(Left(Switch4,4)='PORT')|(Left(Switch5,4)='PORT')|(Left(Switch6,4)='PORT')|(Left(Switch7,4)='PORT')|(Left(Switch8,4)='PORT')|(Left(Switch9,4)='PORT')|(Left(Switch10,4)='PORT')|(Left(Switch11,4)='PORT') THEN DO SwPort=1 Parse VALUE Upper(Arg(1)) WITH '"' . '"' . '"' . '"' . 'PORT=' Port . IF Port='' THEN DO Port=Address() IF Left(Port,11)~='HTTPRESUME.' THEN DO Say 'ERROR: PORT argument was not followed by a = and a string with no spaces between (eg."PORT=HTTPResume.1"), and the host enviroment was not already HTTPResume!' Exit 20 END END END ELSE Port='' /*probably not necessary*/ IF (Left(Switch1,7)='BASEURL')|(Left(Switch2,7)='BASEURL')|(Left(Switch3,7)='BASEURL')|(Left(Switch4,7)='BASEURL')|(Left(Switch5,7)='BASEURL')|(Left(Switch6,7)='BASEURL')|(Left(Switch7,7)='BASEURL')|(Left(Switch8,7)='BASEURL')|(Left(Switch9,7)='BASEURL')|(Left(Switch10,7)='BASEURL')|(Left(Switch11,7)='BASEURL') THEN DO Parse VALUE Upper(Arg(1)) WITH '"' . '"' . '"' . '"' . 'BASEURL=' BaseURLDir . IF BaseURLDir='' THEN DO Say 'ERROR: BASEURL argument was not followed by a = and a string with no spaces between (eg."BASEURL=www.amiga.com")!' Exit 20 END BaseURLDir=SubStr(Arg(1),Index(Upper(Arg(1)),BaseURLDir),Length(BaseURLDir)) IF Right(BaseURLDir,1)~='/' THEN BaseURLDir=BaseURLDir||'/' END ELSE DO Parse VALUE Reverse(MainURL) WITH . '/' BaseURLDir IF Length(BaseURLDir)<8 THEN BaseURLDir = Reverse(MainURL) /*check for cases like FileURL="http://www.kosh.net" - i.e.no end slash*/ BaseURLDir=Reverse(BaseURLDir)||'/' END If SwResume=1 THEN Say 'NOTE: Resume mode activated!' If SwBroken=1 THEN Say 'NOTE: Broken-link detection mode activated!' IF Port='' THEN DO /* run HTTPResume & set-up related stuff; OVERWRITE cause problems (restart from scratch if fails in the middle) */ Address Command 'Run >Nil: '||HTTPResume||' GUI NODATECHECK AUTORESUME STARTICONIFIED QUICKQUIT NOERRREQ RXPORTFILE='||TempFile /*NOENV removed*/ Say 'Waiting for HTTPResume...' DO UNTIL Exists(TempFile) Delay(25) END Delay(100) IF ~Open(.port, TempFile, 'READ') THEN DO Say 'ERROR: Could not open "'||TempFile||'"!' Exit 20 END Port=ReadLn(.port) Call Close(.port) Call Delete(TempFile) IF Port='***' THEN DO Say 'ERROR: HTTPResume could not open it''s ARexx port!' Exit 20 END Address(Port) END ELSE DO Address(Port) /* SET OVERWRITE*/ SET NODATECHECK SET AUTORESUME SET QUICKQUIT SET NOERRREQ END /* init set-up */ Root.0=1 Root.1=MainURL Root.1.HTML=1 ModemOnLine=0 LastSuffix='' /*record of suffix of last user confirmed file download - so semi-intelligent! */ /* get all pages recurcively */ Say 'Downloading & scanning pages...' CALL DownloadList('Root.',DestDir,BaseURLDir,SwNoAsk,SwArc,SwPic,SwResume,SwDepth,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) Say 'Finished.' IF SwPort=0 THEN QUIT Exit DownloadList: PROCEDURE EXPOSE Root. Resume. ModemOnLine LastSuffix ExtDir /* DownloadList(URLList,DestDir,BaseURLDir,SwNoAsk,SwArc,SwPic,SwResume,SwDepth,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) */ /* grab args */ URLList=Arg(1) DestDir=Arg(2) BaseURLDir=Arg(3) SwNoAsk=Arg(4) SwArc=Arg(5) SwPic=Arg(6) SwResume=Arg(7) SwDepth=Arg(8) SwNoPause=Arg(9) SwTerse=Arg(10) SwBroken=Arg(11) SwNoBaseIndex=Arg(12) INTERPRET 'URLListSize='||URLList||'0' /* deal with each URL in list in turn */ IF Length(URLList)>SwDepth THEN NOP ELSE DO IF URLListSize>0 THEN DO /*Say '-Length('URLList')='||Length(URLList)*/ DO i=1 TO URLListSize NewURLList=URLList||i INTERPRET 'URL='||NewURLList INTERPRET 'HTMLfile='||NewURLList||'.HTML' INTERPRET 'ExternalLink='||NewURLList||'.EXT' IF ExternalLink~=1 THEN ExternalLink=0 /* decide on relative file & path */ IF ExternalLink=0 THEN /* find local path */ Parse VAR URL (BaseURLDir) PathFile ELSE DO /* outside normal search (external) - set PathFile as just file */ Parse VALUE Reverse(URL) WITH PathFile '/' . PathFile=Reverse(PathFile) END IF (Right(PathFile,1)='/')|(PathFile='') THEN DO PathFile=PathFile||'InDeX.hTmL' /* give filename-less pages a name */ HTMLfile=1 /* force attempted scanning for HTMLs */ GuessedURL=1 END ELSE GuessedURL=0 Parse VALUE Reverse(PathFile) WITH File '/' Path File=Reverse(File) Path=Reverse(Path) IF Path='' THEN DO File=PathFile Path='' END /* create necessary dir(s) */ PathLeft=Path /* use URL minus file at end */ CurPath=DestDir DO While PathLeft~='' Parse VALUE PathLeft WITH NewDir '/' PathLeft IF NewDir~=='' THEN DO CurPath=CurPath||NewDir||'/' CALL MakeDir(Left(CurPath,Length(CurPath)-1)) END ELSE DO IF SwTerse=0 THEN DO IF SwNoPause=0 THEN DO Say 'WARNING: Empty dir name in URL "'||URL||'" (press <return>)' Pull Input END ELSE Say 'WARNING: Empty dir name in URL "'||URL||'"' END END END IF ExternalLink=0 THEN DownloadFile=DestDir||PathFile ELSE DownloadFile=ExtDir||PathFile IF SwResume~=0 THEN DO SeenBefore=0 /*RxDownloadFile=Translate(DownloadFile,'abcdefghijklmnopqrstuvwxyz0123456789','abcdefghijklmnopqrstuvwxyz0123456789','_')*/ RxDownloadFile=DownloadFile IF Resume.RxDownloadFile=1 THEN SeenBefore=1 IF SeenBefore=0 THEN DO /* if visited this page before then pass! */ IF Exists(DownloadFile) THEN DO Resume.RxDownloadFile=1 IF HTMLfile=1 THEN DO /* parse page for URLs into a list */ CALL GetURLs(NewURLList||'.',DownloadFile,BaseURLDir,URL,SwNoAsk,SwArc,SwPic,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) /* download pages from list */ CALL DownloadList(NewURLList||'.',DestDir,BaseURLDir,SwNoAsk,SwArc,SwPic,SwResume,SwDepth,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) END END ELSE DO IF ExternalLink=0 THEN DO SwResume=0 /* reached point did last time, now continue as before */ IF ModemOnLine=0 THEN DO /* hack to ensure only halt for input once (so can leave alone) */ Say 'NOTE: Reached point where left off! (press <return>)' Pull Input END ModemOnLine=1 END END END END IF (SwResume=0)|(ExternalLink=1) THEN DO IF ~Exists(DownloadFile) THEN DO /* if visited this page before then pass! */ IF ExternalLink=0 THEN DO /* download file */ CALL GetHTML(URL,DownloadFile) /* see if was downloaded */ IF ~Exists(DownloadFile) THEN DO IF SwTerse=0 THEN DO IF GuessedURL~=1 THEN DO IF SwNoPause=0 THEN DO Say 'WARNING: Couldn''t download file "'||DownloadFile||'" (press <return>)' Pull Input END ELSE Say 'WARNING: Couldn''t download file "'||DownloadFile||'"' END END /* if not downloaded then place empty 'fake' file to stop RESUME stopping to early */ Call Open(.file, DownloadFile, 'WRITE') Call Close(.file) END ELSE DO /* scan downloaded file if asked to */ IF HTMLfile=1 THEN DO /* parse page for URLs into a list */ CALL GetURLs(NewURLList||'.',DownloadFile,BaseURLDir,URL,SwNoAsk,SwArc,SwPic,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) /* download pages from list */ CALL DownloadList(NewURLList||'.',DestDir,BaseURLDir,SwNoAsk,SwArc,SwPic,SwResume,SwDepth,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) END END END ELSE DO /* download file, only if haven't done before (hijacked how RESUME checks) */ /*RxDownloadFile=Translate(DownloadFile,'abcdefghijklmnopqrstuvwxyz0123456789','abcdefghijklmnopqrstuvwxyz0123456789','_')*/ RxDownloadFile=DownloadFile IF Resume.RxDownloadFile~=1 THEN DO Resume.RxDownloadFile=1 /* download file */ CALL GetHTML(URL,DownloadFile) /* re-try downloading twice, incase 'freak' connect failure */ IF ~Exists(DownloadFile) THEN DO CALL Delay(50) CALL GetHTML(URL,DownloadFile) IF ~Exists(DownloadFile) THEN DO CALL Delay(50) CALL GetHTML(URL,DownloadFile) END END IF ~Exists(DownloadFile) THEN DO Parse VALUE Reverse(URLList) WITH . '.' BrokePage INTERPRET 'BrokePage='||Reverse(BrokePage) Say 'Found BROKEN LINK to "'||URL||'" in "'||BrokePage||'"' END ELSE CALL Delete(DownloadFile) END END END END END END END Return GetURLs: PROCEDURE EXPOSE Root. LastSuffix /* GetURL(URLList,DownloadFile,BaseURLDir,FileURL,SwNoAsk,SwArc,SwPic,SwNoPause,SwTerse,SwBroken,SwNoBaseIndex) */ /* get args */ URLList=Arg(1) DownloadFile=Arg(2) BaseURLDir=Arg(3) FileURL=Arg(4) SwNoAsk=Arg(5) SwArc=Arg(6) SwPic=Arg(7) SwNoPause=Arg(8) SwTerse=Arg(9) SwBroken=Arg(10) SwNoBaseIndex=Arg(11) INTERPRET URLList||'0 = 0' /* expand path to global, if is local reference like "/new/0083.html" */ Parse VALUE Reverse(FileURL) WITH . '/' LocalURLDir IF Length(LocalURLDir)<8 THEN LocalURLDir = Reverse(FileURL) /*check for cases like FileURL="http://www.kosh.net" - i.e.no end slash*/ LocalURLDir=Reverse(LocalURLDir)||'/' PARSE VAR LocalURLDir 'http://' LocalURLDomain '/' . /*recover domain from URL*/ LocalURLDomain='http://'||LocalURLDomain /* parse (possibly) downloaded HTML file for URLs */ IF Open(.file, DownloadFile, 'READ') THEN DO /*Say 'Reading HTML file "'||DownloadFile||'"'*/ DO WHILE ~EOF(.file) /* search for HTML ref. links */ Line=ReadLn(.file) ULine=Upper(Line) NewPos=0; Mode=0 DO UNTIL NewPos<0 /* non-frame search */ IF Mode=0 THEN DO NewPos=Pos('HREF=',ULine,NewPos+1) /*finds "<AREA HREF" "<A HREF" "<A/nHREF" ...*/ IF NewPos=0 THEN DO Mode=1 NewPos=0 END END /* frame/image search */ IF Mode=1 THEN DO Done=1 /* "SRC=" occurs for both in frames & images */ NewPos=Pos('SRC=',ULine,NewPos+1) IF NewPos=0 THEN NewPos=-1 END /* expand URL to full path, remove non-file parts & store only if inside parameters */ IF NewPos>0 THEN DO Parse VAR Line =NewPos '="' URL '"' IF URL='' THEN Parse VAR Line =NewPos '=\"' URL '"' /*javascripts precede "s by a slash*/ /*Say '-Found URL "'||URL||'"'*/ IF URL~=='' THEN DO Parse UPPER VAR URL URLDev ':' URLRest Download=1 IF (URLRest~=='')&(URLDev~='HTTP') THEN DO /* found e.g. "mailto:" */ IF SwTerse=0 THEN Say 'Found non-http link "'||URL||'"' Download=0 END IF URLDev=Upper(URL) THEN DO /* IF Left(URL,1)='/' THEN DO URL=SubStr(URL,2) /* remove pre-slash */ DO While Left(URL,2)='..' /* convert "../" to "//" */ URL='/'||SubStr(URL,3) END /*???remove pre-slash (again)???*/ URL=LocalURLDomain||URL /* pre-slash finally replaced by domain name */ END ELSE DO DO While Left(URL,2)='..' /* convert "../" to "//" */ URL='/'||SubStr(URL,3) END IF Left(URL,1)='/' THEN URL=SubStr(URL,2) /* remove 1st spurious pre-slash (otherwise path wrongly interpreted) */ URL=LocalURLDir||URL /* local reference -> expand to full */ END */ IF Left(URL,1)='/' THEN URL=SubStr(URL,2) /* remove pre-slash */ /* DO While Left(URL,2)='..' /* convert "../" to "//" */ URL='/'||SubStr(URL,3) END */ /*handle "../"s*/ newURL=URL DO UNTIL URL=newURL URL=newURL Parse VAR URL newURLpre '../' newURLpost IF newURLpost~='' THEN newURL=newURLpre||'//'||newURLpost END URL=newURL /*handle "./"s*/ newURL=URL DO UNTIL URL=newURL URL=newURL Parse VAR URL newURLpre './' newURLpost IF newURLpost~='' THEN newURL=newURLpre||newURLpost END URL=newURL IF Left(URL,1)='/' THEN URL=SubStr(URL,2) /* remove 1st spurious pre-slash (otherwise path wrongly interpreted) */ URL=LocalURLDir||URL /* local reference -> expand to full */ /* if have double-slashes (go down dir), then remove relevant dirs */ Done=0 DO Until Done=1 /* an algorithm with a bit of magic! */ URLLen=Length(URL) EndDPos=Index(URL,'//',8) /* marks end of '//' */ IF EndDPos>0 THEN DO StartDPos=Index(Reverse(URL),'/',URLLen-EndDPos+2) IF StartDPos>0 THEN DO StartDPos=URLLen-StartDPos+1 /* marks 1st slash before '//' */ URL=Left(URL,StartDPos)||SubStr(URL,EndDPos+2) END ELSE Done=1 END ELSE Done=1 END END ELSE DO IF URLRest=='' THEN Download=0 /* nothing after ":" */ END IF Download=1 THEN DO /* remove "#search" from "http:path/file#search" */ IF Index(URL,'#')~=0 THEN DO Parse VALUE Reverse(URL) WITH . '#' URL URL=Reverse(URL) END /* remove "?search" from "http:path/file?search" */ IF Index(URL,'?')~=0 THEN DO Parse VALUE Reverse(URL) WITH . '?' URL URL=Reverse(URL) END /* used to check for suffix & if it is not part of e.g. www.amiga.com */ Parse VALUE Reverse(URL) WITH URLFile '/' . Parse VAR URLFile Suffix '.' . URLFile=Reverse(URLFile) Suffix=Reverse(Suffix) DirSuffix=0 GotSuffix=0 IF Suffix~=URLFile THEN DO GotSuffix=1 IF Index(Reverse(URL),'/')>(Length(URL)-7) THEN DO DirSuffix=1 GotSuffix=0 END END IF (GotSuffix=0)&(Right(URL,1)~='/')&(Index(URL,'?')=0) THEN URL2=URL||'/' /* MAY need to add implicit slash */ ELSE URL2='' ExernalLink=0 IF Left(URL,Length(BaseURLDir))~==BaseURLDir THEN DO IF SwBroken=0 THEN Download=0 /* don't download pages below initial dir */ ELSE ExternalLink=1 /* do download but no further */ END ELSE DO /* check if URL is BaseURL's index (ie."baseurl/" or "baseurl/index.html") */ IF SwNoBaseIndex=1 THEN DO AboveBaseURLDir=Upper(Right(URL,Length(URL)-Length(BaseURLDir))) IF (AboveBaseURLDir='/')|(Left(AboveBaseURLDir,6)='INDEX.') THEN Download=0 END END /* check if should download this file-type */ HTMLfile=0 IF (GotSuffix=1)&&(ExternalLink=0) THEN DO /*never consider external links*/ Suffix=Upper(Left(Suffix,3,' ')) /* as well as always downloading HTML files, also intelligently downloads if suffix same as last user-confirmed download */ IF (Suffix~='HTM')&(Suffix~='SHT')&(Suffix~='SH ')&(Suffix~=LastSuffix) THEN DO Ask=1 Arc=0; Pic=0 IF (Suffix='LZX')|(Suffix='LHA')|(Suffix='ZIP')|(Suffix='LZH')|(Suffix='ZOO') THEN Arc=1 IF (Suffix='GIF')|(Suffix='JPG')|(Suffix='JPE')|(Suffix='PNG')|(Suffix='JFI')|(Suffix='SWF') THEN Pic=1 IF (Arc=1)&(SwArc=1) THEN Ask=0 IF (Pic=1)&(SwPic=1) THEN Ask=0 IF Download=1 THEN DO IF Ask=1 THEN DO IF SwNoAsk=1 THEN Download=0 ELSE DO Say 'QUERY: Download file "'||URL||'"?' DO Until Input~='' Pull Input END IF Left(Input,1)='N' THEN Download=0 ELSE LastSuffix=Suffix END END END END ELSE HTMLfile=1 END IF FileURL=URL THEN Download=0 /* avoid self-referencing infinite loops */ /*IF Download=1 THEN DO IF URL2='' THEN Say '--Final URL="'||URL||'", Download='||Download ELSE Say '--Final URL="'||URL||'"(/), Download='||Download END*/ IF Download=1 THEN DO /* store URL in list */ URL=Strip(URL,'T') INTERPRET 'URLListSize='||URLList||'0 + 1' INTERPRET URLList||'0 = URLListSize' INTERPRET URLList||URLListSize||' = URL' INTERPRET URLList||URLListSize||'.HTML = HTMLfile' /* record whether file should be scanned! */ INTERPRET URLList||URLListSize||'.EXT = ExternalLink' /* record is external link */ /* add 2nd possible interpretation of URL to list*/ IF URL2~='' THEN DO URL2=Strip(URL2,'T') INTERPRET 'URLListSize='||URLList||'0 + 1' INTERPRET URLList||'0 = URLListSize' INTERPRET URLList||URLListSize||' = URL2' INTERPRET URLList||URLListSize||'.HTML = HTMLfile' /* record whether file should be scanned! */ IF ExternalLink=1 THEN INTERPRET URLList||URLListSize||'.EXT = 1' /* record is external link */ END END END END /* ELSE DO IF SwTerse=0 THEN DO IF SwNoPause=0 THEN DO Say 'WARNING: Empty URL at pos '||NewPos||' in line "'||Line||'" (press <return>)' Pull Input END ELSE Say 'WARNING: Empty URL at pos '||NewPos||' in line "'||Line||'"' END END */ END END END CALL Close(.file) END Return GetHTML: PROCEDURE /* GetHTML(TheURL,File) */ /* grab args */ TheURL=Arg(1) File=Arg(2) /* download file */ SET OUTFILE File SET URL TheURL START Working=1 DO WHILE Working>0 CALL Delay(50) QUERY FINISHED Working=Result END Return